Load libraries

rm(list=ls())

 library(pheatmap)
 library(ggplot2)
# library(matrixStats)
# library(wesanderson)
# library(clusterProfiler)
# library(enrichplot)
# library(msigdbr)
 library(dichromat)
# library(stringr)
 library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
 library(ggrepel)
 library(reshape2)
 library(umap)
 library(ggthemes)
 library(cowplot)
## 
## Attaching package: 'cowplot'
## The following object is masked from 'package:ggthemes':
## 
##     theme_map
#library(MetaboAnalystR)
library(vsn)
## Loading required package: Biobase
## Loading required package: BiocGenerics
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
## 
##     combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, aperm, append, as.data.frame, basename, cbind,
##     colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
##     get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
##     match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
##     Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
##     table, tapply, union, unique, unsplit, which.max, which.min
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
library(DEP)
## Warning in fun(libname, pkgname): mzR has been built against a different Rcpp version (1.0.10)
## than is installed on your system (1.0.11). This might lead to errors
## when loading mzR. If you encounter such issues, please send a report,
## including the output of sessionInfo() to the Bioc support forum at 
## https://support.bioconductor.org/. For details see also
## https://github.com/sneumann/mzR/wiki/mzR-Rcpp-compiler-linker-issue.
## 
## Attaching package: 'DEP'
## The following object is masked from 'package:vsn':
## 
##     meanSdPlot
library(readr)
library(naniar)
library(SummarizedExperiment)
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## The following object is masked from 'package:dplyr':
## 
##     count
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:dplyr':
## 
##     first, rename
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## Loading required package: IRanges
## 
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
## 
##     collapse, desc, slice
## Loading required package: GenomeInfoDb
library(data.table)
## 
## Attaching package: 'data.table'
## The following object is masked from 'package:SummarizedExperiment':
## 
##     shift
## The following object is masked from 'package:GenomicRanges':
## 
##     shift
## The following object is masked from 'package:IRanges':
## 
##     shift
## The following objects are masked from 'package:S4Vectors':
## 
##     first, second
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

Set working directories

# if you are using Rstudio run the following command, otherwise, set the working directory to the folder where this script is in
setwd(dirname(rstudioapi::getActiveDocumentContext()$path))

# create directory for results
dir.create(file.path(getwd(),'results'), showWarnings = FALSE)
# create directory for plots
dir.create(file.path(getwd(),'plots'), showWarnings = FALSE)

Load data

abu_data = read.csv(file = "data/Metaboanalyst_HMDBmatch_duplicatedvalues_o26011_DI-IMS_neg_rscript_ALS_ctrl_gender_corrected_20230825_cleaned.csv")
#remove remove first row with clinical data
abu_data = abu_data[-1,]
row.names(abu_data) = make.unique(abu_data[,1])
HMDB = abu_data[,1]
abu_data = abu_data[,-1]
patids = colnames(abu_data)[1:103]
write_csv(abu_data[,1:103], file = "results/abundancy_metabolomics_clean_HMDB.csv")


#load new clinical dataset
clin = read.csv(file = "data/Clinical_information_FINAL.csv")
clin = clin[, c("MAXOMOD.ID","Tube.ID","CSF.proteomic..metabolomic..miRNA.ID", "Group", "Gender", "Age.at.collection", "NfL..pg.ml.", "Genetics", "Disease.onset", "Age.at.onset", "ALS.progresion.rate")]
colnames(clin) = c("maxomod_id","tube_id","patid", "disease", "sex", "age", "neurofilaments","genetics", "onset", "age_at_onset", "progression_rate")

#make variables factor or numeric
clin$disease = as.factor(clin$disease)
clin$sex = as.factor(clin$sex)
clin$onset = as.factor(clin$onset)
clin$neurofilaments = as.numeric(clin$neurofilaments)
## Warning: NAs introduced by coercion
clin$genetics = as.factor(clin$genetics)
levels(clin$genetics) = c("C9orf72", "negative", "negative", "not_performed", "negative")

#create extra categorical variable for age based on median
m = median(clin$age)
clin$age_cat = rep(NA, length(clin$age))
clin$age_cat[clin$age>=m] = "over_61"
clin$age_cat[clin$age<m] = "under_61"
clin$age_cat = as.factor(clin$age_cat)

rownames(clin) = clin$patid
clin = clin[patids ,] #align clinical variables with metabolomics data order

#separate abundancy values and compound information
compound_information = abu_data[,104:105]
compound_information$HMDB = HMDB
compound_information_unique_mass = compound_information[!duplicated(compound_information$correctedMZ), ]
rownames(compound_information_unique_mass) = compound_information_unique_mass$correctedMZ

#take mass instead of HMDB
abu_data <- abu_data[!duplicated(abu_data$correctedMZ), ]
length(abu_data$correctedMZ) == length(unique(abu_data$correctedMZ))
## [1] TRUE
rownames(abu_data) = abu_data$correctedMZ
abu_data = abu_data[, !(colnames(abu_data) %in% c("Compound","correctedMZ"))]

abu_data[abu_data == 0] = NA
abu_data <- mutate_all(abu_data, function(x) as.numeric(as.character(x)))

#make summarized experiment
abu_data$name = abu_data$ID = rownames(abu_data)
abundance.columns <- grep("CSF", colnames(abu_data)) # get abundance column numbers
experimental.design = clin[,c("patid","disease", "onset", "age", "sex", "neurofilaments", "genetics", "age_at_onset", "progression_rate","age_cat")]
colnames(experimental.design) = c("label","condition","onset", "age", "sex", "neurofilaments", "genetics", "age_at_onset", "progression_rate","age_cat")
experimental.design$replicate = 1:nrow(experimental.design)
se_abu_data <- make_se(abu_data, abundance.columns, experimental.design)

#make separate summarized experiment with only ALS patients
#and onset as condition variable
abu_data_ALS = abu_data[,clin$disease == "als"]
clin_ALS = clin[clin$disease == "als",]
abu_data_ALS$name = abu_data_ALS$ID = rownames(abu_data_ALS)
abundance.columns <- grep("CSF", colnames(abu_data_ALS)) # get abundance column numbers
experimental.design = clin_ALS[,c("patid", "onset", "age", "sex", "neurofilaments", "genetics", "age_at_onset", "progression_rate", "age_cat")]
colnames(experimental.design) = c("label","condition", "age", "sex", "neurofilaments", "genetics", "age_at_onset", "progression_rate", "age_cat")
experimental.design$replicate = 1:nrow(experimental.design)
se_abu_data_ALS <- make_se(abu_data_ALS, abundance.columns, experimental.design)

Missing inspection

#all patients
vis_miss(as.data.frame(assay(se_abu_data)), show_perc = TRUE, show_perc_col = TRUE, cluster = T)

ggsave("plots/missing_vis_miss_heatmap_before.pdf", width = 11, height = 8, units = "in")
#filter values that are missing more than 20% in at least one condition
se_abu_data_filtered = filter_missval(se_abu_data, thr = (0.2/2*ncol(assay(se_abu_data))))
vis_miss(as.data.frame(assay(se_abu_data_filtered)), show_perc = TRUE, show_perc_col = TRUE, cluster = T)

ggsave("plots/missing_vis_miss_heatmap_after.pdf", width = 11, height = 8, units = "in")

#only ALS patients
vis_miss(as.data.frame(assay(se_abu_data_ALS)), show_perc = TRUE, show_perc_col = TRUE, cluster = T)

ggsave("plots/missing_vis_miss_heatmap_before_ALS.pdf", width = 11, height = 8, units = "in")
#filter values that are missing more than 20% in at least one condition
se_abu_data_filtered_ALS = filter_missval(se_abu_data_ALS, thr = (0.2/2*ncol(assay(se_abu_data_ALS))))
vis_miss(as.data.frame(assay(se_abu_data_filtered_ALS)), show_perc = TRUE, show_perc_col = TRUE, cluster = T)

ggsave("plots/missing_vis_miss_heatmap_after_ALS.pdf", width = 11, height = 8, units = "in")

#dimensions of the data
dim(se_abu_data)
## [1] 444 103
dim(se_abu_data_filtered)
## [1] 438 103
dim(se_abu_data_ALS)
## [1] 444  51
dim(se_abu_data_filtered_ALS)
## [1] 438  51
#number of males and females
table(se_abu_data$sex)
## 
## Female   Male 
##     36     67
table(se_abu_data_ALS$sex)
## 
## Female   Male 
##     16     35
# % missing per patient:
round(apply(X = as.data.frame(assay(se_abu_data)), function(x) sum(is.na(x)), MARGIN = 2) / nrow(as.data.frame(assay(se_abu_data))) * 100 , 1)
##  ctrl_1  ctrl_2   als_3   als_4   als_5   als_6   als_7   als_8  ctrl_9  als_10 
##     0.9     1.1     1.4     1.6     1.6     1.4     1.4     1.8     1.4     1.4 
## ctrl_11 ctrl_12 ctrl_13 ctrl_14 ctrl_15 ctrl_16 ctrl_17 ctrl_18 ctrl_19 ctrl_20 
##     1.4     1.4     1.4     1.4     1.4     1.1     1.6     1.4     1.4     1.4 
## ctrl_21 ctrl_22 ctrl_23 ctrl_24 ctrl_25 ctrl_26 ctrl_27 ctrl_28 ctrl_29 ctrl_30 
##     1.4     1.6     1.4     1.4     1.4     1.6     1.4     1.4     1.4     1.4 
## ctrl_31 ctrl_32 ctrl_33 ctrl_34 ctrl_35 ctrl_36 ctrl_37 ctrl_38 ctrl_39 ctrl_40 
##     1.4     1.4     1.6     1.4     1.6     1.4     1.4     1.4     1.4     1.4 
## ctrl_41 ctrl_42 ctrl_43 ctrl_44 ctrl_45 ctrl_46 ctrl_47 ctrl_48 ctrl_49 ctrl_50 
##     0.9     1.1     1.4     1.4     1.4     0.7     1.4     1.4     1.4     1.4 
## ctrl_51 ctrl_52 ctrl_53 ctrl_54 ctrl_55  als_56  als_57  als_58  als_59 ctrl_60 
##     1.6     1.6     1.6     1.6     1.1     1.1     1.6     1.4     1.4     1.4 
##  als_61  als_62  als_63  als_64  als_65  als_66  als_67  als_68  als_69  als_70 
##     1.4     1.8     1.6     1.6     1.4     1.4     1.4     1.4     1.4     1.4 
## ctrl_71  als_72  als_73  als_74  als_75  als_76  als_77  als_78  als_79  als_80 
##     1.1     1.6     1.6     1.1     1.6     1.1     1.4     1.1     1.4     1.4 
##  als_81 ctrl_82  als_83  als_84  als_85  als_86  als_87  als_88  als_89  als_90 
##     1.4     1.4     1.4     1.4     1.4     1.4     1.4     1.1     1.4     1.8 
##  als_91  als_92 ctrl_93  als_94  als_95  als_96  als_97  als_98  als_99 als_100 
##     1.4     1.6     1.4     1.4     1.4     1.4     0.9     1.8     1.4     1.6 
## als_101 als_102 als_103 
##     1.4     1.4     1.8
round(apply(X = as.data.frame(assay(se_abu_data_filtered)), function(x) sum(is.na(x)), MARGIN = 2) / nrow(as.data.frame(assay(se_abu_data))) * 100 , 1)
##  ctrl_1  ctrl_2   als_3   als_4   als_5   als_6   als_7   als_8  ctrl_9  als_10 
##     0.0     0.0     0.0     0.2     0.2     0.0     0.0     0.5     0.0     0.0 
## ctrl_11 ctrl_12 ctrl_13 ctrl_14 ctrl_15 ctrl_16 ctrl_17 ctrl_18 ctrl_19 ctrl_20 
##     0.0     0.0     0.0     0.0     0.0     0.0     0.2     0.0     0.0     0.0 
## ctrl_21 ctrl_22 ctrl_23 ctrl_24 ctrl_25 ctrl_26 ctrl_27 ctrl_28 ctrl_29 ctrl_30 
##     0.0     0.2     0.0     0.0     0.0     0.2     0.0     0.0     0.0     0.0 
## ctrl_31 ctrl_32 ctrl_33 ctrl_34 ctrl_35 ctrl_36 ctrl_37 ctrl_38 ctrl_39 ctrl_40 
##     0.0     0.0     0.2     0.0     0.2     0.0     0.0     0.0     0.0     0.0 
## ctrl_41 ctrl_42 ctrl_43 ctrl_44 ctrl_45 ctrl_46 ctrl_47 ctrl_48 ctrl_49 ctrl_50 
##     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0     0.0 
## ctrl_51 ctrl_52 ctrl_53 ctrl_54 ctrl_55  als_56  als_57  als_58  als_59 ctrl_60 
##     0.2     0.2     0.2     0.2     0.0     0.0     0.2     0.0     0.0     0.0 
##  als_61  als_62  als_63  als_64  als_65  als_66  als_67  als_68  als_69  als_70 
##     0.0     0.5     0.2     0.2     0.0     0.0     0.0     0.0     0.0     0.0 
## ctrl_71  als_72  als_73  als_74  als_75  als_76  als_77  als_78  als_79  als_80 
##     0.0     0.2     0.2     0.0     0.2     0.0     0.0     0.2     0.0     0.0 
##  als_81 ctrl_82  als_83  als_84  als_85  als_86  als_87  als_88  als_89  als_90 
##     0.0     0.0     0.0     0.2     0.0     0.0     0.0     0.2     0.0     0.5 
##  als_91  als_92 ctrl_93  als_94  als_95  als_96  als_97  als_98  als_99 als_100 
##     0.0     0.2     0.2     0.0     0.0     0.0     0.0     0.5     0.0     0.2 
## als_101 als_102 als_103 
##     0.0     0.0     0.5

Imputation and normalization

#all patients
norm <- normalize_vsn(se_abu_data_filtered)
meanSdPlot(norm)

ggsave("plots/meanSdPlot_norm.pdf", width = 11, height = 8, units = "in")

#only ALS
norm_ALS <- normalize_vsn(se_abu_data_filtered_ALS)
meanSdPlot(norm_ALS)

ggsave("plots/meanSdPlot_norm_ALS.pdf", width = 11, height = 8, units = "in")

  # imputation with several methods: MinProb, MAN, KNN
#all patients
norm_imp_MinProb <- impute(norm, fun = "MinProb", q=0.01)
## [1] 0.1486842
norm_imp_man <- impute(norm, fun = "man", shift = 1.8, scale = 0.3)
norm_imp_knn <- impute(norm, fun = "knn", rowmax = 0.9)

#only ALS
norm_imp_MinProb_ALS <- impute(norm_ALS, fun = "MinProb", q=0.01)
## [1] 0.1351636
norm_imp_man_ALS <- impute(norm_ALS, fun = "man", shift = 1.8, scale = 0.3)
norm_imp_knn_ALS <- impute(norm_ALS, fun = "knn", rowmax = 0.9)


data = list(imp_MinProb = norm_imp_MinProb, imp_man = norm_imp_man, imp_knn= norm_imp_knn,
            imp_MinProb_ALS = norm_imp_MinProb_ALS, imp_man_ALS = norm_imp_man_ALS, imp_knn_ALS = norm_imp_knn_ALS)
saveRDS(data, file = "results/summarized_experiments_imputed.rds")

Differential expression analysis

covariates = c("no_cov", "age_cov", "sex_cov", "age_sex_cov")
covariates_f = c(~0 + condition, ~0 + condition + age_cat, ~0 + condition + sex, ~0 + condition + age_cat + sex)
patients = c("all_patients", "only_female", "only_male")
patients_f = c(NA, "Female", "Male")
res = list()

l = 1
for(k in 1:length(data)){
  for(i in 1:length(covariates)){
    for(j in 1:length(patients)){
    
       title = paste0(names(data)[k],"_",covariates[i], "_", patients[j])
        d = data[[k]]
        if(j >1) { d = d[,d$sex == patients_f[j]] }
        control = "ctrl"
        if(k > 3){control = "bulbar"}
        if(i < 3 | j == 1){
          print(title)
          print(dim(d))
          t = test_diff(d, type = "control", control = control,
            test = NULL, design_formula = formula(covariates_f[[i]]))
          res[[l]] = as.data.frame(t@elementMetadata@listData)
          res[[l]]$fdr = p.adjust(res[[l]][,9], method="BH")
          print(dim(res[[l]]))
          names(res)[l] = title
          write.csv(res[[l]], file = paste0("results/DEx", title, ".csv"))
          l = l+1
          
        }}}}
## [1] "imp_MinProb_no_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_MinProb_no_cov_only_female"
## [1] 438  36
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_MinProb_no_cov_only_male"
## [1] 438  67
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_MinProb_age_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_MinProb_age_cov_only_female"
## [1] 438  36
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_MinProb_age_cov_only_male"
## [1] 438  67
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_MinProb_sex_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_MinProb_age_sex_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_man_no_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_man_no_cov_only_female"
## [1] 438  36
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_man_no_cov_only_male"
## [1] 438  67
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_man_age_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_man_age_cov_only_female"
## [1] 438  36
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_man_age_cov_only_male"
## [1] 438  67
## Tested contrasts: als_vs_ctrl
## Warning in pvt.fit.nullmodel(x, x0, statistic = statistic): Variance of scale
## parameter set to zero due to numerical problems
## [1] 438  10
## [1] "imp_man_sex_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_man_age_sex_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_knn_no_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_knn_no_cov_only_female"
## [1] 438  36
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_knn_no_cov_only_male"
## [1] 438  67
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_knn_age_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_knn_age_cov_only_female"
## [1] 438  36
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_knn_age_cov_only_male"
## [1] 438  67
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_knn_sex_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_knn_age_sex_cov_all_patients"
## [1] 438 103
## Tested contrasts: als_vs_ctrl
## [1] 438  10
## [1] "imp_MinProb_ALS_no_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_MinProb_ALS_no_cov_only_female"
## [1] 438  16
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_MinProb_ALS_no_cov_only_male"
## [1] 438  35
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_MinProb_ALS_age_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_MinProb_ALS_age_cov_only_female"
## [1] 438  16
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_MinProb_ALS_age_cov_only_male"
## [1] 438  35
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_MinProb_ALS_sex_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_MinProb_ALS_age_sex_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_man_ALS_no_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_man_ALS_no_cov_only_female"
## [1] 438  16
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_man_ALS_no_cov_only_male"
## [1] 438  35
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_man_ALS_age_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_man_ALS_age_cov_only_female"
## [1] 438  16
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_man_ALS_age_cov_only_male"
## [1] 438  35
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_man_ALS_sex_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_man_ALS_age_sex_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_knn_ALS_no_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_knn_ALS_no_cov_only_female"
## [1] 438  16
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_knn_ALS_no_cov_only_male"
## [1] 438  35
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_knn_ALS_age_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_knn_ALS_age_cov_only_female"
## [1] 438  16
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_knn_ALS_age_cov_only_male"
## [1] 438  35
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
## [1] "imp_knn_ALS_sex_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## Warning in pvt.fit.nullmodel(x, x0, statistic = statistic): Variance of scale
## parameter set to zero due to numerical problems
## [1] 438  10
## [1] "imp_knn_ALS_age_sex_cov_all_patients"
## [1] 438  51
## Tested contrasts: spinal_vs_bulbar
## [1] 438  10
saveRDS(res, file = "results/DEx_results_all_in_list.rds")

Visualization 1: mean expressions per sample

#visualize every dataset, also raw
      data_all = list(raw = se_abu_data, filtered = se_abu_data_filtered, normalized = norm,
                      raw_ALS = se_abu_data_ALS, filtered_ALS = se_abu_data_filtered_ALS, normalized_ALS = norm_ALS)
      data_all = c(data_all, data)
      
      mean_expression_plot = function(data, file_sample, file_mass){
        ggplot(data = reshape2::melt(data), aes(x=Var1, y=value)) +
        geom_boxplot(color="darkseagreen4", fill="darkseagreen3") +
        theme_set(theme_minimal()) +
        theme_few() +
        scale_colour_few() +
        theme(legend.position = "none") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
        theme(axis.text=element_text(size=6))
      
      ggsave(file_sample, width = 11, height = 8, units = "in")
      
      ggplot(data = reshape2::melt(data), aes(x=reorder(as.factor(Var2),value), y=value)) +
        geom_boxplot(color="darkseagreen4", fill="darkseagreen3") +
        theme_set(theme_minimal()) +
        theme_few() +
        scale_colour_few() +
        theme(legend.position = "none") +
        theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) +
        theme(axis.text=element_text(size=6))
      
      ggsave(file_mass, width = 11*2, height = 8, units = "in")
      }
      
      for(i in 1:length(data_all)){
        mean_expression_plot(data = t(assay(data_all[[i]])), 
                            file_sample = paste0("plots/boxplots_expression_each_sample_",
                                                  names(data_all)[i],
                                                  ".pdf"),
                            file_mass = paste0("plots/boxplots_expression_each_mass_",
                                                  names(data_all)[i],
                                                  ".pdf"))
      }
## Warning: Removed 628 rows containing non-finite values (`stat_boxplot()`).
## Removed 628 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 33 rows containing non-finite values (`stat_boxplot()`).
## Removed 33 rows containing non-finite values (`stat_boxplot()`).
## Removed 33 rows containing non-finite values (`stat_boxplot()`).
## Removed 33 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 319 rows containing non-finite values (`stat_boxplot()`).
## Removed 319 rows containing non-finite values (`stat_boxplot()`).
## Warning: Removed 23 rows containing non-finite values (`stat_boxplot()`).
## Removed 23 rows containing non-finite values (`stat_boxplot()`).
## Removed 23 rows containing non-finite values (`stat_boxplot()`).
## Removed 23 rows containing non-finite values (`stat_boxplot()`).
#look at only the important metabolites, and make boxplots of them

#10 most important metabolites according to the model with 
#     - 'man' imputation
#     - stratified on men or women
#     - age covariate

      r = res[["imp_man_age_cov_only_male"]]
      r = r %>% arrange(fdr)
      met_m = r[1:10, "name"]
      
      r = res[["imp_man_age_cov_only_female"]]
      r = r %>% arrange(fdr)
      met_f = r[1:10, "name"]
      
      boxplot_imputations = function(data, 
                                     protein_list,
                                     filepath, 
                                     colours){
          d_MinProb = assay(data[["imp_MinProb"]])[protein_list,]
          d_man = assay(data[["imp_man"]])[protein_list,]
          d_knn = assay(data[["imp_knn"]])[protein_list,]
          d_norm = assay(data[["normalized"]])[protein_list,]
          
          d = as.data.frame(rbind(rep(data_all[["imp_MinProb"]]$condition, 4),
           c(rep("MinProb", ncol(d_MinProb)), rep("man", ncol(d_man)), rep("knn", ncol(d_knn)), rep("norm", ncol(d_norm))),
            cbind(d_MinProb, d_man, d_knn, d_norm)))
          rownames(d)[1:2] = c("condition", "imputation")
          colnames(d) = make.unique(colnames(d))
          d_melt = as.data.frame(t(reshape2::melt(d)))
          d_melt[,3:ncol(d_melt)] = apply(d_melt[,3:ncol(d_melt)], FUN = as.numeric, MARGIN = 2)
          long <- melt(setDT(d_melt), id.vars = c("condition","imputation"), variable.name = "protein")
          
          p = ggplot(long, aes(x=condition, y=value, fill=imputation)) + 
              geom_boxplot() +
              theme_few() +
              scale_colour_few() +
              scale_fill_manual(values=colours) +
              facet_wrap(~protein, ncol = 5)
          p
          
          ggsave(file = filepath, width = 11*2, height = 8*1.5, units = "in")
      }
      
      boxplot_imputations(data = data_all, 
                        protein_list = met_m,
                        filepath = "plots/boxplot_comp_imp_top10_mets_male.pdf",
                        colours = c("lightblue", "#69b3a2","lightblue4", "grey")
                        )
## Using ctrl_1, ctrl_2, als_3, als_4, als_5, als_6, als_7, als_8, ctrl_9, als_10, ctrl_11, ctrl_12, ctrl_13, ctrl_14, ctrl_15, ctrl_16, ctrl_17, ctrl_18, ctrl_19, ctrl_20, ctrl_21, ctrl_22, ctrl_23, ctrl_24, ctrl_25, ctrl_26, ctrl_27, ctrl_28, ctrl_29, ctrl_30, ctrl_31, ctrl_32, ctrl_33, ctrl_34, ctrl_35, ctrl_36, ctrl_37, ctrl_38, ctrl_39, ctrl_40, ctrl_41, ctrl_42, ctrl_43, ctrl_44, ctrl_45, ctrl_46, ctrl_47, ctrl_48, ctrl_49, ctrl_50, ctrl_51, ctrl_52, ctrl_53, ctrl_54, ctrl_55, als_56, als_57, als_58, als_59, ctrl_60, als_61, als_62, als_63, als_64, als_65, als_66, als_67, als_68, als_69, als_70, ctrl_71, als_72, als_73, als_74, als_75, als_76, als_77, als_78, als_79, als_80, als_81, ctrl_82, als_83, als_84, als_85, als_86, als_87, als_88, als_89, als_90, als_91, als_92, ctrl_93, als_94, als_95, als_96, als_97, als_98, als_99, als_100, als_101, als_102, als_103, ctrl_1.1, ctrl_2.1, als_3.1, als_4.1, als_5.1, als_6.1, als_7.1, als_8.1, ctrl_9.1, als_10.1, ctrl_11.1, ctrl_12.1, ctrl_13.1, ctrl_14.1, ctrl_15.1, ctrl_16.1, ctrl_17.1, ctrl_18.1, ctrl_19.1, ctrl_20.1, ctrl_21.1, ctrl_22.1, ctrl_23.1, ctrl_24.1, ctrl_25.1, ctrl_26.1, ctrl_27.1, ctrl_28.1, ctrl_29.1, ctrl_30.1, ctrl_31.1, ctrl_32.1, ctrl_33.1, ctrl_34.1, ctrl_35.1, ctrl_36.1, ctrl_37.1, ctrl_38.1, ctrl_39.1, ctrl_40.1, ctrl_41.1, ctrl_42.1, ctrl_43.1, ctrl_44.1, ctrl_45.1, ctrl_46.1, ctrl_47.1, ctrl_48.1, ctrl_49.1, ctrl_50.1, ctrl_51.1, ctrl_52.1, ctrl_53.1, ctrl_54.1, ctrl_55.1, als_56.1, als_57.1, als_58.1, als_59.1, ctrl_60.1, als_61.1, als_62.1, als_63.1, als_64.1, als_65.1, als_66.1, als_67.1, als_68.1, als_69.1, als_70.1, ctrl_71.1, als_72.1, als_73.1, als_74.1, als_75.1, als_76.1, als_77.1, als_78.1, als_79.1, als_80.1, als_81.1, ctrl_82.1, als_83.1, als_84.1, als_85.1, als_86.1, als_87.1, als_88.1, als_89.1, als_90.1, als_91.1, als_92.1, ctrl_93.1, als_94.1, als_95.1, als_96.1, als_97.1, als_98.1, als_99.1, als_100.1, als_101.1, als_102.1, als_103.1, ctrl_1.2, ctrl_2.2, als_3.2, als_4.2, als_5.2, als_6.2, als_7.2, als_8.2, ctrl_9.2, als_10.2, ctrl_11.2, ctrl_12.2, ctrl_13.2, ctrl_14.2, ctrl_15.2, ctrl_16.2, ctrl_17.2, ctrl_18.2, ctrl_19.2, ctrl_20.2, ctrl_21.2, ctrl_22.2, ctrl_23.2, ctrl_24.2, ctrl_25.2, ctrl_26.2, ctrl_27.2, ctrl_28.2, ctrl_29.2, ctrl_30.2, ctrl_31.2, ctrl_32.2, ctrl_33.2, ctrl_34.2, ctrl_35.2, ctrl_36.2, ctrl_37.2, ctrl_38.2, ctrl_39.2, ctrl_40.2, ctrl_41.2, ctrl_42.2, ctrl_43.2, ctrl_44.2, ctrl_45.2, ctrl_46.2, ctrl_47.2, ctrl_48.2, ctrl_49.2, ctrl_50.2, ctrl_51.2, ctrl_52.2, ctrl_53.2, ctrl_54.2, ctrl_55.2, als_56.2, als_57.2, als_58.2, als_59.2, ctrl_60.2, als_61.2, als_62.2, als_63.2, als_64.2, als_65.2, als_66.2, als_67.2, als_68.2, als_69.2, als_70.2, ctrl_71.2, als_72.2, als_73.2, als_74.2, als_75.2, als_76.2, als_77.2, als_78.2, als_79.2, als_80.2, als_81.2, ctrl_82.2, als_83.2, als_84.2, als_85.2, als_86.2, als_87.2, als_88.2, als_89.2, als_90.2, als_91.2, als_92.2, ctrl_93.2, als_94.2, als_95.2, als_96.2, als_97.2, als_98.2, als_99.2, als_100.2, als_101.2, als_102.2, als_103.2, ctrl_1.3, ctrl_2.3, als_3.3, als_4.3, als_5.3, als_6.3, als_7.3, als_8.3, ctrl_9.3, als_10.3, ctrl_11.3, ctrl_12.3, ctrl_13.3, ctrl_14.3, ctrl_15.3, ctrl_16.3, ctrl_17.3, ctrl_18.3, ctrl_19.3, ctrl_20.3, ctrl_21.3, ctrl_22.3, ctrl_23.3, ctrl_24.3, ctrl_25.3, ctrl_26.3, ctrl_27.3, ctrl_28.3, ctrl_29.3, ctrl_30.3, ctrl_31.3, ctrl_32.3, ctrl_33.3, ctrl_34.3, ctrl_35.3, ctrl_36.3, ctrl_37.3, ctrl_38.3, ctrl_39.3, ctrl_40.3, ctrl_41.3, ctrl_42.3, ctrl_43.3, ctrl_44.3, ctrl_45.3, ctrl_46.3, ctrl_47.3, ctrl_48.3, ctrl_49.3, ctrl_50.3, ctrl_51.3, ctrl_52.3, ctrl_53.3, ctrl_54.3, ctrl_55.3, als_56.3, als_57.3, als_58.3, als_59.3, ctrl_60.3, als_61.3, als_62.3, als_63.3, als_64.3, als_65.3, als_66.3, als_67.3, als_68.3, als_69.3, als_70.3, ctrl_71.3, als_72.3, als_73.3, als_74.3, als_75.3, als_76.3, als_77.3, als_78.3, als_79.3, als_80.3, als_81.3, ctrl_82.3, als_83.3, als_84.3, als_85.3, als_86.3, als_87.3, als_88.3, als_89.3, als_90.3, als_91.3, als_92.3, ctrl_93.3, als_94.3, als_95.3, als_96.3, als_97.3, als_98.3, als_99.3, als_100.3, als_101.3, als_102.3, als_103.3 as id variables
      boxplot_imputations(data = data_all, 
                        protein_list = met_f,
                        filepath = "plots/boxplot_comp_imp_top10_mets_female.pdf",
                        colours = c("lightpink","salmon", "salmon4", "grey")
                        )
## Using ctrl_1, ctrl_2, als_3, als_4, als_5, als_6, als_7, als_8, ctrl_9, als_10, ctrl_11, ctrl_12, ctrl_13, ctrl_14, ctrl_15, ctrl_16, ctrl_17, ctrl_18, ctrl_19, ctrl_20, ctrl_21, ctrl_22, ctrl_23, ctrl_24, ctrl_25, ctrl_26, ctrl_27, ctrl_28, ctrl_29, ctrl_30, ctrl_31, ctrl_32, ctrl_33, ctrl_34, ctrl_35, ctrl_36, ctrl_37, ctrl_38, ctrl_39, ctrl_40, ctrl_41, ctrl_42, ctrl_43, ctrl_44, ctrl_45, ctrl_46, ctrl_47, ctrl_48, ctrl_49, ctrl_50, ctrl_51, ctrl_52, ctrl_53, ctrl_54, ctrl_55, als_56, als_57, als_58, als_59, ctrl_60, als_61, als_62, als_63, als_64, als_65, als_66, als_67, als_68, als_69, als_70, ctrl_71, als_72, als_73, als_74, als_75, als_76, als_77, als_78, als_79, als_80, als_81, ctrl_82, als_83, als_84, als_85, als_86, als_87, als_88, als_89, als_90, als_91, als_92, ctrl_93, als_94, als_95, als_96, als_97, als_98, als_99, als_100, als_101, als_102, als_103, ctrl_1.1, ctrl_2.1, als_3.1, als_4.1, als_5.1, als_6.1, als_7.1, als_8.1, ctrl_9.1, als_10.1, ctrl_11.1, ctrl_12.1, ctrl_13.1, ctrl_14.1, ctrl_15.1, ctrl_16.1, ctrl_17.1, ctrl_18.1, ctrl_19.1, ctrl_20.1, ctrl_21.1, ctrl_22.1, ctrl_23.1, ctrl_24.1, ctrl_25.1, ctrl_26.1, ctrl_27.1, ctrl_28.1, ctrl_29.1, ctrl_30.1, ctrl_31.1, ctrl_32.1, ctrl_33.1, ctrl_34.1, ctrl_35.1, ctrl_36.1, ctrl_37.1, ctrl_38.1, ctrl_39.1, ctrl_40.1, ctrl_41.1, ctrl_42.1, ctrl_43.1, ctrl_44.1, ctrl_45.1, ctrl_46.1, ctrl_47.1, ctrl_48.1, ctrl_49.1, ctrl_50.1, ctrl_51.1, ctrl_52.1, ctrl_53.1, ctrl_54.1, ctrl_55.1, als_56.1, als_57.1, als_58.1, als_59.1, ctrl_60.1, als_61.1, als_62.1, als_63.1, als_64.1, als_65.1, als_66.1, als_67.1, als_68.1, als_69.1, als_70.1, ctrl_71.1, als_72.1, als_73.1, als_74.1, als_75.1, als_76.1, als_77.1, als_78.1, als_79.1, als_80.1, als_81.1, ctrl_82.1, als_83.1, als_84.1, als_85.1, als_86.1, als_87.1, als_88.1, als_89.1, als_90.1, als_91.1, als_92.1, ctrl_93.1, als_94.1, als_95.1, als_96.1, als_97.1, als_98.1, als_99.1, als_100.1, als_101.1, als_102.1, als_103.1, ctrl_1.2, ctrl_2.2, als_3.2, als_4.2, als_5.2, als_6.2, als_7.2, als_8.2, ctrl_9.2, als_10.2, ctrl_11.2, ctrl_12.2, ctrl_13.2, ctrl_14.2, ctrl_15.2, ctrl_16.2, ctrl_17.2, ctrl_18.2, ctrl_19.2, ctrl_20.2, ctrl_21.2, ctrl_22.2, ctrl_23.2, ctrl_24.2, ctrl_25.2, ctrl_26.2, ctrl_27.2, ctrl_28.2, ctrl_29.2, ctrl_30.2, ctrl_31.2, ctrl_32.2, ctrl_33.2, ctrl_34.2, ctrl_35.2, ctrl_36.2, ctrl_37.2, ctrl_38.2, ctrl_39.2, ctrl_40.2, ctrl_41.2, ctrl_42.2, ctrl_43.2, ctrl_44.2, ctrl_45.2, ctrl_46.2, ctrl_47.2, ctrl_48.2, ctrl_49.2, ctrl_50.2, ctrl_51.2, ctrl_52.2, ctrl_53.2, ctrl_54.2, ctrl_55.2, als_56.2, als_57.2, als_58.2, als_59.2, ctrl_60.2, als_61.2, als_62.2, als_63.2, als_64.2, als_65.2, als_66.2, als_67.2, als_68.2, als_69.2, als_70.2, ctrl_71.2, als_72.2, als_73.2, als_74.2, als_75.2, als_76.2, als_77.2, als_78.2, als_79.2, als_80.2, als_81.2, ctrl_82.2, als_83.2, als_84.2, als_85.2, als_86.2, als_87.2, als_88.2, als_89.2, als_90.2, als_91.2, als_92.2, ctrl_93.2, als_94.2, als_95.2, als_96.2, als_97.2, als_98.2, als_99.2, als_100.2, als_101.2, als_102.2, als_103.2, ctrl_1.3, ctrl_2.3, als_3.3, als_4.3, als_5.3, als_6.3, als_7.3, als_8.3, ctrl_9.3, als_10.3, ctrl_11.3, ctrl_12.3, ctrl_13.3, ctrl_14.3, ctrl_15.3, ctrl_16.3, ctrl_17.3, ctrl_18.3, ctrl_19.3, ctrl_20.3, ctrl_21.3, ctrl_22.3, ctrl_23.3, ctrl_24.3, ctrl_25.3, ctrl_26.3, ctrl_27.3, ctrl_28.3, ctrl_29.3, ctrl_30.3, ctrl_31.3, ctrl_32.3, ctrl_33.3, ctrl_34.3, ctrl_35.3, ctrl_36.3, ctrl_37.3, ctrl_38.3, ctrl_39.3, ctrl_40.3, ctrl_41.3, ctrl_42.3, ctrl_43.3, ctrl_44.3, ctrl_45.3, ctrl_46.3, ctrl_47.3, ctrl_48.3, ctrl_49.3, ctrl_50.3, ctrl_51.3, ctrl_52.3, ctrl_53.3, ctrl_54.3, ctrl_55.3, als_56.3, als_57.3, als_58.3, als_59.3, ctrl_60.3, als_61.3, als_62.3, als_63.3, als_64.3, als_65.3, als_66.3, als_67.3, als_68.3, als_69.3, als_70.3, ctrl_71.3, als_72.3, als_73.3, als_74.3, als_75.3, als_76.3, als_77.3, als_78.3, als_79.3, als_80.3, als_81.3, ctrl_82.3, als_83.3, als_84.3, als_85.3, als_86.3, als_87.3, als_88.3, als_89.3, als_90.3, als_91.3, als_92.3, ctrl_93.3, als_94.3, als_95.3, als_96.3, als_97.3, als_98.3, als_99.3, als_100.3, als_101.3, als_102.3, als_103.3 as id variables

Visualization 1b: Density plot

#density plots of clinical variables
d = as.data.frame(cbind(clin[,c("sex", "age_cat", "disease")], as.data.frame(t(assay(data_all[["normalized"]])))))
long <- melt(setDT(d), id.vars = c("sex", "age_cat", "disease"), variable.name = "metabolite")

#density plots for variables with and without missing
d = as.data.frame(assay(data_all[["normalized"]]))
missing = apply(d, function(x) sum(is.na(x)) , MARGIN = 1)
missing[missing>0] = "yes"
missing[missing == 0] = "no"
missing = as.factor(missing)
d2 = cbind(missing,d)
long2 <- melt(setDT(d2), id.vars = "missing", variable.name = "metabolite")

a = ggplot(long, aes(x=value, color=sex)) +
  geom_density() +
  theme_few() +
  scale_colour_few()
b = ggplot(long, aes(x=value, color=age_cat)) +
  geom_density() +
  theme_few() +
  scale_colour_few()
c = ggplot(long, aes(x=value, color=disease)) +
  geom_density() +
  theme_few() +
  scale_colour_few()
d = ggplot(long2, aes(x=value, color=missing)) +
  geom_density() +
  theme_few() +
  scale_colour_few()

library(ggpubr)
## 
## Attaching package: 'ggpubr'
## The following object is masked from 'package:cowplot':
## 
##     get_legend
ggarrange(a,b,c,d, ncol = 2, nrow = 2)
## Warning: Removed 33 rows containing non-finite values (`stat_density()`).
## Removed 33 rows containing non-finite values (`stat_density()`).
## Removed 33 rows containing non-finite values (`stat_density()`).
## Removed 33 rows containing non-finite values (`stat_density()`).

ggsave(file = "plots/density.pdf", width = 11, height = 8, units = "in")

Visualization 2: heatmaps

#functions for saving the heatmaps as figures
        
        save_pheatmap_pdf <- function(x, filename, width=11/2, height=8/2) {
           stopifnot(!missing(x))
           stopifnot(!missing(filename))
           pdf(filename, width=width, height=height)
           grid::grid.newpage()
           grid::grid.draw(x$gtable)
           dev.off()
        }
        
        make_pheatmap <- function(data, cluster_cols = T, main = "Heatmap", clustering_method = "ward.D"){
          p = pheatmap::pheatmap(data, name = "expression", cutree_cols = 1,
                  show_colnames = T,
                  show_rownames = FALSE,
                  fontsize = 6,
                  fontsize_col = 3,
                  annotation_col = annotation,
                  annotation_colors = annotation_colours,
                  color = viridis::viridis(100, option="G", direction = -1,),
                  main = main,
                  border_color=NA,
                  cluster_cols = cluster_cols,
                  clustering_method = clustering_method,
                  na_col = "grey50")
          return(p)
        }
        
# all clustering methods:
        method = c("ward.D", "ward.D2", "single", "complete", "average" , "mcquitty", "median", "centroid") #see hclust() for meaning of each method

# loop for all datasets and all methods          
        for(i in 1:length(data)){
          for(j in 1:length(method)){
        title = paste0(names(data)[i], "_", method[j])  
        print(title)
      
        # get annotations and dataframe ready
        #annotations
        if(i<=3){
        annotation = data.frame(group = as.factor(data[[i]]$condition), 
                                sex = as.factor(data[[i]]$sex), 
                                age = data[[i]]$age, 
                                onset = as.factor(data[[i]]$onset), 
                                neurofilaments = data[[i]]$neurofilaments,
                                genetics = data[[i]]$genetics, 
                                progression_rate = data[[i]]$progression_rate)
        rownames(annotation) = data[[i]]@colData$ID
        annotation_colours <- list(
          group = c(ctrl = "darkseagreen3", als = "darksalmon"), 
          sex = c(Female = "lightpink", Male ="lightblue3"), 
          age = c("white", "darkgreen"), 
          onset = c(ctrl = "grey50", spinal = "mediumpurple1", bulbar = "mediumaquamarine"),
          neurofilaments = c("white", "royalblue"),
          genetics = c(not_performed = "grey80", C9orf72 = "aquamarine4", negative = "salmon"),
          progression_rate = c("yellow", "red"))
        }
                if(i>3){
        annotation = data.frame(group = as.factor(data[[i]]$condition), 
                                sex = as.factor(data[[i]]$sex), 
                                age_at_onset = data[[i]]$age_at_onset, 
                                neurofilaments = data[[i]]$neurofilaments,
                                genetics = data[[i]]$genetics, 
                                progression_rate = data[[i]]$progression_rate)
        rownames(annotation) = data[[i]]@colData$ID
        annotation_colours <- list(
          group = c(spinal = "mediumpurple1", bulbar = "mediumaquamarine"),
          sex = c(Female = "lightpink", Male ="lightblue3"), 
          age_at_onset = c("white", "darkgreen"),
          neurofilaments = c("white", "royalblue"),
          genetics = c(not_performed = "grey80", C9orf72 = "aquamarine4", negative = "salmon"),
          progression_rate = c("yellow", "red"))
        }

#create heatmaps with all patients
        
        #without grouping, all proteins
        p = make_pheatmap(data = assay(data[[i]]), cluster_cols = T, main = paste0("Heatmap all metabolites\n",title, "\nclustered"), clustering_method = method[j])
        save_pheatmap_pdf(p, filename = paste0("plots/heatmap_clustered_",title,".pdf"))
        
        # without grouping, 100 most variable proteins
        d = assay(data[[i]])
        d2 = head(order(rowVars(d),decreasing = T),100)
        p = make_pheatmap(data = d[d2,], cluster_cols = T, main = paste0("Heatmap 100 most variable metabolites\n",title, "\nclustered"), clustering_method = method[j])
        save_pheatmap_pdf(p, filename = paste0("plots/heatmap_clustered_mostvar_",title,".pdf"))
        
        #heatmap with only significant genes
        r = res[[grep(names(data)[i], names(res))[1]]]
        sig_met = r$name[r$fdr<0.05]
            if(length(sig_met)>2){
              d = d[sig_met,]
              p = make_pheatmap(data = d, cluster_cols = T, main = paste0("Heatmap only significant metabolites\n",title, "\nclustered"), clustering_method = method[j])
              save_pheatmap_pdf(p, paste0("plots/heatmap_clustered_only_sig_",title,".pdf"))
            }}}
## [1] "imp_MinProb_ward.D"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_ward.D2"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_single"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_complete"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_average"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_mcquitty"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_median"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_centroid"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_ward.D"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_ward.D2"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_single"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_complete"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_average"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_mcquitty"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_median"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_centroid"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_ward.D"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_ward.D2"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_single"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_complete"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_average"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_mcquitty"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_median"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_centroid"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_ALS_ward.D"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_ALS_ward.D2"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_ALS_single"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_ALS_complete"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_ALS_average"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_ALS_mcquitty"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_ALS_median"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_MinProb_ALS_centroid"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_ALS_ward.D"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_ALS_ward.D2"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_ALS_single"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_ALS_complete"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_ALS_average"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_ALS_mcquitty"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_ALS_median"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_man_ALS_centroid"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_ALS_ward.D"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_ALS_ward.D2"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_ALS_single"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_ALS_complete"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_ALS_average"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_ALS_mcquitty"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_ALS_median"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

## [1] "imp_knn_ALS_centroid"
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.

Visualization 3: UMAP plots

# set seed for reproducible results
set.seed(9)
          group = c("darksalmon","darkseagreen3")
          sex = c("lightpink", "lightblue3")
          onset = c("mediumaquamarine", "mediumpurple1","grey80")
          age_cat = c("darkgreen", "lightgreen")


UMAP_density_plot = function(data, 
                             ggtitle = "UMAP with disease status labels", 
                             legend_name = "Disease status", 
                             labels = clin$Condition, 
                             file_location = "plots/UMAP_condition.pdf", 
                             file_location_labels = "plots/UMAP_condition_labels.pdf",
                             colour_set = c("seagreen4", "slateblue1", "salmon")){
      # run umap function
      umap_out = umap::umap(data)
      umap_plot = as.data.frame(umap_out$layout)
      
      #add condition labels
      umap_plot$group = labels

      # plot umap
      p1 = ggplot(umap_plot) + geom_point(aes(x=V1, y=V2, color = as.factor(group))) +
        ggtitle(ggtitle) +
          theme_few() +
          scale_colour_few() +
          scale_color_manual(name = legend_name, 
                           labels = levels(as.factor(umap_plot$group)), 
                           values = colour_set) 
  
      xdens <- 
        axis_canvas(p1, axis = "x") + 
        geom_density(data = umap_plot, aes(x = V1, fill = group, colour = group), alpha = 0.3) +
        scale_fill_manual( values = colour_set) + 
        scale_colour_manual( values = colour_set)
      ydens <-
        axis_canvas(p1, axis = "y", coord_flip = TRUE) + 
        geom_density(data = umap_plot, aes(x = V2, fill = group, colour = group), alpha = 0.3) +
        coord_flip() +
        scale_fill_manual(values = colour_set) + 
        scale_colour_manual( values = colour_set)
      p1 %>%
        insert_xaxis_grob(xdens, grid::unit(1, "in"), position = "top") %>%
        insert_yaxis_grob(ydens, grid::unit(1, "in"), position = "right") %>%
        ggdraw()
      
      p1
      # save umap
      ggsave(file_location, width = 11/2, height = 8/2, units = "in")
      
            
      p1 + geom_text(label = rownames(umap_plot), x = umap_plot$V1, y = umap_plot$V2,
                     hjust = 0, nudge_x = 1, size = 1.5, colour = "grey")
      
      # save umap with labels
      ggsave(file_location_labels, width = 11/2, height = 8/2, units = "in")
}

for(i in 1:3){
  d = t(assay(data[[i]]))
  labels_disease = data[[i]]$condition
  labels_onset = data[[i]]$onset
  labels_sex = data[[i]]$sex
  labels_age = data[[i]]$age_cat
  title = names(data)[i]
        
#perform plots with function      
        UMAP_density_plot(data = d, 
                          ggtitle = paste0("UMAP with disease status labels\n", title), 
                          legend_name = "Disease status", 
                          labels = labels_disease, 
                          file_location = paste0("plots/UMAP_condition_",title,".pdf"),
                          file_location_labels = paste0("plots/UMAP_condition_labels_",title,".pdf"),
                          colour_set = group)
        
        UMAP_density_plot(data = d, 
                          ggtitle = paste0("UMAP with onset status labels\n", title), 
                          legend_name = "Onset labels", 
                          labels = labels_onset, 
                          file_location = paste0("plots/UMAP_onset_",title,".pdf"), 
                          file_location_labels = paste0("plots/UMAP_onset_labels_",title,".pdf"),
                          colour_set = onset)

        UMAP_density_plot(data = d, 
                          ggtitle = paste0("UMAP with sex labels\n", title), 
                          legend_name = "Sex label", 
                          labels = labels_sex, 
                          file_location = paste0("plots/UMAP_sex_",title,".pdf"), 
                          file_location_labels = paste0("plots/UMAP_sex_labels_",title,".pdf"), 
                          colour_set = sex)
        
        UMAP_density_plot(data = d, 
                          ggtitle = paste0("UMAP with age labels\n", title), 
                          legend_name = "Age label", 
                          labels = labels_age, 
                          file_location = paste0("plots/UMAP_age_",title,".pdf"), 
                          file_location_labels = paste0("plots/UMAP_age_labels_",title,".pdf"), 
                          colour_set = age_cat)
        
#perform plots with only most variable proteins      
        d2 = head(order(colVars(d),decreasing = T),100)
        
        UMAP_density_plot(data = d[,d2], 
                          ggtitle = paste0("UMAP with disease status labels\n", title, "\n with 100 most variable metabolites"), 
                          legend_name = "Disease status", 
                          labels = labels_disease, 
                          file_location = paste0("plots/UMAP_mostvar_condition_",title,".pdf"),
                          file_location_labels = paste0("plots/UMAP_mostvar_condition_labels_",title,".pdf"),
                          colour_set = group)
        
        UMAP_density_plot(data = d[,d2], 
                          ggtitle = paste0("UMAP with onset status labels\n", title, "\n with 100 most variable metabolites"), 
                          legend_name = "Onset labels", 
                          labels = labels_onset, 
                          file_location = paste0("plots/UMAP_mostvar_onset_",title,".pdf"), 
                          file_location_labels = paste0("plots/UMAP_mostvar_onset_labels_",title,".pdf"),
                          colour_set = onset)

        UMAP_density_plot(data = d[,d2], 
                          ggtitle = paste0("UMAP with sex labels", title, "\n with 100 most variable metabolites"), 
                          legend_name = "Sex label", 
                          labels = labels_sex, 
                          file_location = paste0("plots/UMAP_mostvar_sex_",title,".pdf"), 
                          file_location_labels = paste0("plots/UMAP_mostvar_sex_labels_",title,".pdf"), 
                          colour_set = sex)
        
        UMAP_density_plot(data = d[,d2], 
                          ggtitle = paste0("UMAP with age labels\n", title, "\n with 100 most variable metabolites"), 
                          legend_name = "Age label", 
                          labels = labels_age, 
                          file_location = paste0("plots/UMAP_mostvar_age_",title,".pdf"), 
                          file_location_labels = paste0("plots/UMAP_mostvar_age_labels_",title,".pdf"), 
                          colour_set = age_cat)
}
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.

Visualization 4: volcano plots

volcano_plot <- function(data_res, alpha_sig, name_title){
  logFC = data_res[,grep("diff",colnames(data_res))]
  fdr = data_res$fdr
  df <- data.frame(x = logFC, 
                   y = -log10(fdr),
                   name = rownames(data_res)) %>%
    mutate(name = lapply(strsplit(name,"\\|"), function(x) x[1]))
  names(df) <- c("x","y","name")
  df <- df %>%
    mutate(omic_type = case_when(x >= 0 & y >= (-log10(alpha_sig)) ~ "up",
                                 x <= (0) & y >= (-log10(alpha_sig)) ~ "down",
                                 TRUE ~ "ns")) 
  cols <- c("up" = "#d4552b", "down" = "#26b3ff", "ns" = "grey") 
  sizes <- c("up" = 2, "down" = 2, "ns" = 1) 
  alphas <- c("up" = 0.7, "down" = 0.7, "ns" = 0.5)
  ggplot(data = df, aes(x,y)) + 
    geom_point(aes(colour = omic_type), 
               alpha = 0.5, 
               shape = 16,
               size = 3) + 
    geom_hline(yintercept = -log10(alpha_sig),
               linetype = "dashed") + 
    geom_vline(xintercept = 0,linetype = "dashed") +
    geom_point(data = filter(df, y >= (-log10(alpha_sig))),
               aes(colour = omic_type), 
               alpha = 0.5, 
               shape = 16,
               size = 4) + 
    #annotate(geom="text", x=-1.9, y= (-log10(alpha_sig)) + 0.15, label="FDR = 10%",size = 5) +
    geom_text_repel(data = filter(df, y >= (-log10(alpha_sig)) & y > 0),
                     aes(label = name),
                     force = 1,
                    hjust = 1,
                     nudge_x = - 0.3,
                    nudge_y = 0.1,
                    #direction = "x",
                     max.overlaps = 5,
                    segment.size = 0.2,
                     size = 4) +
    geom_text_repel(data = filter(df, y >= (-log10(alpha_sig)) & y < 0),
                    aes(label = name),
                    force = 1,
                    hjust = 0,
                    nudge_x = 0.3,
                    nudge_y = 0.1,
                    #direction = "y",
                    max.overlaps = 5,
                    size = 4) +
    scale_colour_manual(values = cols) + 
    scale_fill_manual(values = cols) + 
    scale_x_continuous(expand = c(0, 0), 
                       limits = c(-1.5, 1.5)) + 
    scale_y_continuous(expand = c(0, 0), limits = c(-0.1, NA)) +
    labs(title = name_title,
         x = "log2(fold change)",
         y = expression(-log[10] ~ "(adjusted p-value)"),
         colour = "Differential \nExpression") +
    theme_classic() + # Select theme with a white background  
    theme(axis.title.y = element_text(size = 14),
          axis.title.x = element_text(size = 14),
          axis.text = element_text(size = 12),
          plot.title = element_text(size = 15, hjust = 0.5),
          text = element_text(size = 14)) +
    annotate("text", x = 0.5, y = 0.5, label = paste0(sum(df$omic_type=="up"), " more abundant \n", sum(df$omic_type=="down"), " less abundant"))
}


for(i in 1:length(res)){
volcano_plot(res[[i]], 0.05 , paste0("Volcano plot metabolomics \nalpha = FDR 0.05\n", names(res)[i]))
ggsave(paste0("plots/volcano_plot_", names(res)[i], "_FDR0.05.pdf"), 
                 width = 11, height = 8, units = "in")
volcano_plot(res[[i]], 0.1 , paste0("Volcano plot metabolomics \nalpha = FDR 0.1\n", names(res)[i]))
ggsave(paste0("plots/volcano_plot_", names(res)[i], "_FDR0.1.pdf"), 
                 width = 11, height = 8, units = "in")
}
## Warning: ggrepel: 138 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 192 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 111 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 192 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 143 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 195 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 118 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 193 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 147 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 197 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 153 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 198 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 135 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 189 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 111 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 191 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 141 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 194 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 118 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 192 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 144 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 196 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 150 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 196 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 139 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 193 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 114 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 194 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 140 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 192 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 119 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 193 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 147 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 197 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 152 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps
## Warning: ggrepel: 197 unlabeled data points (too many overlaps). Consider
## increasing max.overlaps

R and packages versions

sessionInfo()
## R version 4.2.3 (2023-03-15)
## Platform: aarch64-apple-darwin20 (64-bit)
## Running under: macOS Ventura 13.3.1
## 
## Matrix products: default
## BLAS:   /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/4.2-arm64/Resources/lib/libRlapack.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats4    stats     graphics  grDevices utils     datasets  methods  
## [8] base     
## 
## other attached packages:
##  [1] ggpubr_0.6.0                data.table_1.14.8          
##  [3] SummarizedExperiment_1.28.0 GenomicRanges_1.50.2       
##  [5] GenomeInfoDb_1.34.9         IRanges_2.32.0             
##  [7] S4Vectors_0.36.2            MatrixGenerics_1.10.0      
##  [9] matrixStats_1.0.0           naniar_1.0.0               
## [11] readr_2.1.4                 DEP_1.20.0                 
## [13] vsn_3.66.0                  Biobase_2.58.0             
## [15] BiocGenerics_0.44.0         cowplot_1.1.1              
## [17] ggthemes_4.2.4              umap_0.2.10.0              
## [19] reshape2_1.4.4              ggrepel_0.9.3              
## [21] dplyr_1.1.2                 dichromat_2.0-0.1          
## [23] ggplot2_3.4.3               pheatmap_1.0.12            
## 
## loaded via a namespace (and not attached):
##   [1] backports_1.4.1        circlize_0.4.15        systemfonts_1.0.4     
##   [4] plyr_1.8.8             gmm_1.8                shinydashboard_0.7.2  
##   [7] BiocParallel_1.32.6    digest_0.6.33          foreach_1.5.2         
##  [10] htmltools_0.5.6        viridis_0.6.4          fansi_1.0.4           
##  [13] magrittr_2.0.3         cluster_2.1.4          doParallel_1.0.17     
##  [16] tzdb_0.4.0             limma_3.54.2           ComplexHeatmap_2.14.0 
##  [19] vroom_1.6.3            imputeLCMD_2.1         sandwich_3.0-2        
##  [22] askpass_1.1            colorspace_2.1-0       textshaping_0.3.6     
##  [25] xfun_0.40              hexbin_1.28.3          crayon_1.5.2          
##  [28] RCurl_1.98-1.12        jsonlite_1.8.7         impute_1.72.3         
##  [31] zoo_1.8-12             iterators_1.0.14       glue_1.6.2            
##  [34] gtable_0.3.4           zlibbioc_1.44.0        XVector_0.38.0        
##  [37] GetoptLong_1.0.5       DelayedArray_0.24.0    car_3.1-2             
##  [40] shape_1.4.6            abind_1.4-5            scales_1.2.1          
##  [43] mvtnorm_1.2-3          DBI_1.1.3              rstatix_0.7.2         
##  [46] Rcpp_1.0.11            mzR_2.32.0             viridisLite_0.4.2     
##  [49] xtable_1.8-4           clue_0.3-64            reticulate_1.31       
##  [52] bit_4.0.5              preprocessCore_1.60.2  MsCoreUtils_1.10.0    
##  [55] DT_0.29                htmlwidgets_1.6.2      RColorBrewer_1.1-3    
##  [58] ellipsis_0.3.2         pkgconfig_2.0.3        XML_3.99-0.14         
##  [61] farver_2.1.1           sass_0.4.7             utf8_1.2.3            
##  [64] tidyselect_1.2.0       labeling_0.4.3         rlang_1.1.1           
##  [67] later_1.3.1            munsell_0.5.0          tools_4.2.3           
##  [70] cachem_1.0.8           cli_3.6.1              generics_0.1.3        
##  [73] broom_1.0.5            fdrtool_1.2.17         evaluate_0.21         
##  [76] stringr_1.5.0          fastmap_1.1.1          mzID_1.36.0           
##  [79] yaml_2.3.7             ragg_1.2.5             knitr_1.43            
##  [82] bit64_4.0.5            purrr_1.0.2            ncdf4_1.21            
##  [85] visdat_0.6.0           mime_0.12              compiler_4.2.3        
##  [88] rstudioapi_0.15.0      png_0.1-8              ggsignif_0.6.4        
##  [91] affyio_1.68.0          tibble_3.2.1           bslib_0.5.1           
##  [94] stringi_1.7.12         highr_0.10             RSpectra_0.16-1       
##  [97] MSnbase_2.24.2         lattice_0.21-8         ProtGenerics_1.30.0   
## [100] Matrix_1.6-1           tmvtnorm_1.5           vctrs_0.6.3           
## [103] pillar_1.9.0           norm_1.0-11.1          lifecycle_1.0.3       
## [106] BiocManager_1.30.22    jquerylib_0.1.4        MALDIquant_1.22.1     
## [109] GlobalOptions_0.1.2    bitops_1.0-7           httpuv_1.6.11         
## [112] R6_2.5.1               pcaMethods_1.90.0      affy_1.76.0           
## [115] promises_1.2.1         gridExtra_2.3          codetools_0.2-19      
## [118] MASS_7.3-60            assertthat_0.2.1       openssl_2.1.0         
## [121] rjson_0.2.21           withr_2.5.0            GenomeInfoDbData_1.2.9
## [124] parallel_4.2.3         hms_1.1.3              grid_4.2.3            
## [127] tidyr_1.3.0            rmarkdown_2.24         carData_3.0-5         
## [130] shiny_1.7.5